library(tibble)
library(dplyr)
library(foreign)Think about a hypothetical event: e.g. birth, death, partnering, marriage or commitment to a partner, entering a program, leaving a program, attaining a degree, first diagnosis of anxiety or depression, etc.
N <- 10 # sample size
T <- 10 # number of timepoints
id <- rep(seq(1, N), each = T) # id's 1 to N
t <- rep(seq(1, T), N) # timepoints 1 to T
# random event times
# uniform event times
event_time <- rep(round(runif(N, 3, T),
digits = 0),
each = T)
event <- t >= event_time # event has occurred t >= event_time
event <- factor(event,
levels = c(FALSE, TRUE),
labels = c("No Event", "Event"))
# arbitrarily censored after 7
censored <- event_time > 7
censored <- factor(censored,
levels = c(FALSE, TRUE),
labels = c("Not Censored", "Censored"))
# tibble
# data required for animation
mydata <- tibble::tibble(id, t, event_time, event, censored)
mydata2 <- mydata %>%
filter(t == 1) %>%
select(id, event_time, censored)write.dta(mydata, file = "event-history-multiple-records.dta")
write.dta(mydata2, file = "event-history-single-record.dta")library(ggplot2)
library(plotly)Individuals in the animation below who have not yet experienced an event are indicated by a ●.
When an event occurs for an individual, the symbol changes to a ×.
In this simulation, we imagine that the study period ends after time 7, so observations for which the event occurs after time 7 are considered to be censored: i.e. a failure is not observed.
Censored observations (failure not observed) are maize ⬤, and non-censored observations (failure observed) are blue ⬤ .
pal <- c("#00274C", "#FFCB05") # color palette
p2 <- plot_ly(data = mydata, # use mydata
x = ~t, # x is t (time)
y = ~id, # y is id
frame = ~t, # frames based on t (time)
text = ~paste("Event:" ,
event,
"<br>Censored:",
censored),
type = 'scatter',
mode = 'marker',
color = ~censored, # color is censored (yes / no)
colors = pal, # use color palette
symbol = ~as.numeric(event), # symbol is event (occurred / not occurred)
symbols = c('circle', # event not occurred
'x'), # event
marker = list(size = 10)) %>% # marker size
layout(title = 'Hypothetical Timing of Events \nCensored at Time 7',
shapes = list(type='line', # censoring line
x0 = 7,
x1 = 7,
y0 = 0,
y1 = 10,
line=list(dash='dot', width=1))) %>%
animation_opts(3000)
p2 # replaylibrary(DT)datatable(mydata2, caption = "Simple Event History Data")Notice how every row in this particular data set is a person timepoint, not simply a person. Every person in this data has multiple rows.
datatable(mydata, caption = "Data in Multiple Record Form")